In [ ]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from dataset import *
from plots import *
from metrics import *
from models_functions import *
# Set style for matplotlib
plt.style.use("Solarize_Light2")
import plotly.io as pio
pio.renderers.default = "notebook_connected"
WARNING:tensorflow:From c:\Users\VG User\Documents\GitHub\MLinAPP-FP01-14\.venv\Lib\site-packages\keras\src\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.
In [ ]:
# Path to the root directory of the dataset
ROOTDIR_DATASET_NORMAL = '../dataset/normal'
ROOTDIR_DATASET_ANOMALY = '../dataset/collisions'
# TF_ENABLE_ONEDNN_OPTS=0 means that the model will not use the oneDNN library for optimization
import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
Various parameters¶
In [ ]:
#freq = '1.0'
#freq = '0.1'
freq = '0.01'
#freq = '0.005'
file_name_normal = "_20220811_rbtc_"
file_name_collisions = "_collision_20220811_rbtc_"
recording_normal = [0, 2, 3, 4]
recording_collisions = [1, 5]
freq_str = freq.replace(".", "_")
features_folder_normal = f"./features/normal{freq_str}/"
features_folder_collisions = f"./features/collisions{freq_str}/"
Data¶
In [ ]:
df_features_normal, df_normal_raw, _ = get_dataframes(ROOTDIR_DATASET_NORMAL, file_name_normal, recording_normal, freq, None)
df_features_collisions, df_collisions_raw, df_collisions_raw_action = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, recording_collisions, freq, None)
df_features_collisions_1, df_collisions_raw_1, df_collisions_raw_action_1 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [1], freq, None)
df_features_collisions_5, df_collisions_raw_5, df_collisions_raw_action_5 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [5], freq, None)
Loading data. Found 31 different actions. Loading data done. Computing features.
Progress: 0% Complete
Skipped feature extraction for pickFromPallet(1,2)=[true,1,0] 2022-08-11 14:37:37.436000 : 2022-08-11 14:37:37.421000. Skipped feature extraction for placeToPallet(1,1)=[true,0] 2022-08-11 14:37:37.421000 : 2022-08-11 14:37:37.442000. Skipped feature extraction for pickFromPallet(3,2)=[true,1,0] 2022-08-11 15:36:32.568000 : 2022-08-11 15:36:32.533000. Skipped feature extraction for pickFromPallet(3,2)=[true,1,0] 2022-08-11 15:36:32.572000 : 2022-08-11 15:36:32.561000. Skipped feature extraction for placeToPallet(1,3)=[true,0] 2022-08-11 15:36:32.533000 : 2022-08-11 15:36:32.572000. Skipped feature extraction for placeToPallet(1,3)=[true,0] 2022-08-11 15:36:32.561000 : 2022-08-11 15:36:32.561000. --- 116.22523260116577 seconds --- Loading data. Found 31 different actions. Loading data done. Computing features.
Progress: 0% Complete
Skipped feature extraction for moveOverPallet(1,3)=[true,0] 2022-08-11 16:55:15.149000 : 2022-08-11 16:55:15.146000. Skipped feature extraction for moveOverPallet(3,1)=[true,0] 2022-08-11 16:55:15.146000 : 2022-08-11 16:55:15.150000. --- 51.63796544075012 seconds --- Loading data. Found 31 different actions. Loading data done. Computing features.
Progress: 0% Complete
--- 30.938878297805786 seconds --- Loading data. Found 31 different actions. Loading data done. Computing features.
Progress: 0% Complete
Skipped feature extraction for moveOverPallet(1,3)=[true,0] 2022-08-11 16:55:15.149000 : 2022-08-11 16:55:15.146000. Skipped feature extraction for moveOverPallet(3,1)=[true,0] 2022-08-11 16:55:15.146000 : 2022-08-11 16:55:15.150000. --- 28.175462007522583 seconds ---
In [ ]:
# df_features_normal, df_normal_raw, _ = get_dataframes(ROOTDIR_DATASET_NORMAL, file_name_normal, recording_normal, freq, f"{features_folder_normal}")
# df_features_collisions, df_collisions_raw, df_collisions_raw_action = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, recording_collisions, freq, f"{features_folder_collisions}1_5/")
# df_features_collisions_1, df_collisions_raw_1, df_collisions_raw_action_1 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [1], freq, f"{features_folder_collisions}1/")
# df_features_collisions_5, df_collisions_raw_5, df_collisions_raw_action_5 = get_dataframes(ROOTDIR_DATASET_ANOMALY, file_name_collisions, [5], freq, f"{features_folder_collisions}5/")
In [ ]:
X_train, y_train, X_test, y_test, df_test = get_train_test_data(df_features_normal, df_features_collisions, full_normal=True)
X_train_1, y_train_1, X_test_1, y_test_1, df_test_1 = get_train_test_data(df_features_normal, df_features_collisions_1, full_normal=True)
X_train_5, y_train_5, X_test_5, y_test_5, df_test_5 = get_train_test_data(df_features_normal, df_features_collisions_5, full_normal=True)
Collisions¶
In [ ]:
collisions_rec1, collisions_init1 = get_collisions('1', ROOTDIR_DATASET_ANOMALY)
collisions_rec5, collisions_init5 = get_collisions('5', ROOTDIR_DATASET_ANOMALY)
# Merge the collisions of the two recordings in one dataframe
collisions_rec = pd.concat([collisions_rec1, collisions_rec5])
collisions_init = pd.concat([collisions_init1, collisions_init5])
In [ ]:
collisions_zones, y_collisions = get_collisions_zones_and_labels(collisions_rec, collisions_init, df_features_collisions)
collisions_zones_1, y_collisions_1 = get_collisions_zones_and_labels(collisions_rec1, collisions_init1, df_features_collisions_1)
collisions_zones_5, y_collisions_5 = get_collisions_zones_and_labels(collisions_rec5, collisions_init5, df_features_collisions_5)
DAGMM for Anomaly Detection in Time Series Data¶
In [ ]:
from algorithms.dagmm import DAGMM
classifier = DAGMM(
num_epochs=10,
lambda_energy=0.1,
lambda_cov_diag=0.005,
lr=1e-4,
batch_size=32,
gmm_k=5,
normal_percentile=80,
sequence_length=30,
autoencoder_type=DAGMM.AutoEncoder.LSTM, # Using LSTM autoencoder
hidden_size=32,
autoencoder_args={
'n_layers': (4, 4),
'use_bias': (True, True),
'dropout': (0.1, 0.1)
},
seed=42,
gpu=None, # Set to None for CPU, or specify GPU index if available
details=True
)
# Train the DAGMM on normal data
classifier.fit(X_train)
print("DAGMM training completed.")
100%|██████████| 10/10 [00:25<00:00, 2.53s/it]
DAGMM training completed.
Predictions¶
In [ ]:
df_test = get_statistics(X_test, y_collisions, classifier, df_test, freq, threshold_type="mad")
df_test_1 = get_statistics(X_test_1, y_collisions_1, classifier, df_test_1, freq, threshold_type="mad")
df_test_5 = get_statistics(X_test_5, y_collisions_5, classifier, df_test_5, freq, threshold_type="mad")
Anomaly prediction completed.
Number of anomalies detected: 0 with threshold 33.560173678666125, std
Number of anomalies detected: 0 with threshold 32.196242037477596, mad
Number of anomalies detected: 16 with threshold 25.325382750609826, percentile
Number of anomalies detected: 0 with threshold 57.75940595152788, IQR
Number of anomalies detected: 197 with threshold 0.0, zero
choosen threshold type: mad, with value: 32.1962
F1 Score: 0.0000
Accuracy: 0.6569
Precision: 0.0000
Recall: 0.0000
precision recall f1-score support
0 0.66 1.00 0.79 201
1 0.00 0.00 0.00 105
accuracy 0.66 306
macro avg 0.33 0.50 0.40 306
weighted avg 0.43 0.66 0.52 306
ROC AUC Score: 0.5204
Anomalies detected: 0
Best threshold: -19.5755 | F1 Score: 0.5147 | Precision: 0.3465 | Recall: 1.0000
Anomalies detected with best threshold: 303
-------------------------------------------------------------------------------------
Anomaly prediction completed.
Number of anomalies detected: 13 with threshold 24.955992948358226, std
Number of anomalies detected: 17 with threshold 16.21054283240262, mad
Number of anomalies detected: 9 with threshold 27.424001065889996, percentile
Number of anomalies detected: 0 with threshold 37.60206255763769, IQR
Number of anomalies detected: 68 with threshold 0.0, zero
choosen threshold type: mad, with value: 16.2105
F1 Score: 0.1154
Accuracy: 0.7195
Precision: 0.1765
Recall: 0.0857
precision recall f1-score support
0 0.78 0.89 0.83 129
1 0.18 0.09 0.12 35
accuracy 0.72 164
macro avg 0.48 0.49 0.47 164
weighted avg 0.65 0.72 0.68 164
ROC AUC Score: 0.5320
Anomalies detected: 17
Best threshold: -14.4213 | F1 Score: 0.3743 | Precision: 0.2303 | Recall: 1.0000
Anomalies detected with best threshold: 152
-------------------------------------------------------------------------------------
Anomaly prediction completed.
Number of anomalies detected: 3 with threshold 24.83768828719799, std
Number of anomalies detected: 8 with threshold 19.777925820151964, mad
Number of anomalies detected: 8 with threshold 20.503467241923016, percentile
Number of anomalies detected: 3 with threshold 25.312643468379974, IQR
Number of anomalies detected: 141 with threshold 0.0, zero
choosen threshold type: mad, with value: 19.7779
F1 Score: 0.0000
Accuracy: 0.5461
Precision: 0.0000
Recall: 0.0000
precision recall f1-score support
0 0.58 0.91 0.71 85
1 0.00 0.00 0.00 56
accuracy 0.55 141
macro avg 0.29 0.45 0.35 141
weighted avg 0.35 0.55 0.43 141
ROC AUC Score: 0.3624
Anomalies detected: 8 Best threshold: 0.2194 | F1 Score: 0.5685 | Precision: 0.3972 | Recall: 1.0000 Anomalies detected with best threshold: 141 -------------------------------------------------------------------------------------
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw, df_collisions_raw_action, collisions_zones, df_test, title="Collisions zones vs predicted zones for both recordings")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_1, df_collisions_raw_action_1, collisions_zones_1, df_test_1, title="Collisions zones vs predicted zones for recording 1")
In [ ]:
plot_anomalies_true_and_predicted(df_collisions_raw_5, df_collisions_raw_action_5, collisions_zones_5, df_test_5, title="Collisions zones vs predicted zones for recording 5")